# -*- coding: utf-8 -*-
"""
Created on Tue Jun 16 06:29:09 2020

@author: HP
"""


from os.path import isfile
import pandas as pd
import numpy as np


#=============================================================================
#INPUT FILE
#=============================================================================
df = pd.read_csv( 'ITCR_cdata.csv' )
#Get rid of no risk questions
df = df[ (df['pz'] != 66666.) & (df['pw'] != 66666.) ]
ids = sorted( set( df['participantid'] ) )

#=============================================================================
#RESULT FILE
#=============================================================================

columns = ['participantid', 'fosd good', 'fosd bad', 'imp good', 'imp bad', \
           'pat good', 'pat bad', 'imp equalp', 'pat equalp', 'cn good', 'cn total', 'ordinary', \
           'giffen' ]

#-------------------
#manage results file
#-------------------
results_file = 'data_validation.xlsx'
#If the file exists then the first part of the script was already run.
if not isfile( results_file ):
    #=============================================================================
    #SIMPLE VALIDATION EXCERCISES
    #=============================================================================
    #Get vars for convenience
    x = df['x']; y = df['y']; z = df['z']; w = df['w'];
    px = df['px']; py = df['py']; pz = df['pz']; pw = df['pw']
    m = px * x + py * y + pz * z + pw * w
    
    #------------------
    #FOSD
    #------------------
    
    m_switched_states = px * z + py * w + pz * x + pw * y
    df['fosd good'] = m < m_switched_states
    df['fosd bad'] = m > m_switched_states
    
    #=============================================================================
    #SUMMARIZE
    #=============================================================================
    
    results_df = df[['participantid', 'fosd good', 'fosd bad']].groupby('participantid').sum()
    
    results_df.to_excel(results_file)
    
results_df = pd.read_excel( results_file, sheet_name = 0, index_col = 0 )

#Update time stuff
#Get vars for convenience
x = df['x']; y = df['y']; z = df['z']; w = df['w'];
px = df['px']; py = df['py']; pz = df['pz']; pw = df['pw']
m = px * x + py * y + pz * z + pw * w

#------------------
#FOSD
#------------------

m_switched_states = px * z + py * w + pz * x + pw * y
df['fosd good'] = m < m_switched_states
df['fosd bad'] = m > m_switched_states

#------------------
#IMPATIENCE
#------------------

imp_state1_good = ( x > y ) & ( px <= py )
imp_state1_bad  = ( y > x ) & ( px <= py )
imp_state2_good = ( z > w ) & ( pz <= pw )
imp_state2_bad  = ( w > z ) & ( pz <= pw )
imp_state1_total = ( px <= py )
imp_state2_total = ( pz <= pw )

df['imp good'] = imp_state1_good.astype('int') + imp_state2_good.astype('int')
df['imp bad']  = imp_state1_bad.astype('int')  + imp_state1_bad.astype('int')
df['imp total'] = imp_state1_total.astype('int') + imp_state2_total.astype('int')

#------------------
#PATIENCE
#------------------

pat_state1_good = ( x < y ) & ( px >= py )
pat_state1_bad  = ( y < x ) & ( px >= py )
pat_state2_good = ( z < w ) & ( pz >= pw )   
pat_state2_bad  = ( w < z ) & ( pz >= pw )
pat_state1_total = ( px >= py )
pat_state2_total = ( pz >= pw )

df['pat good'] = pat_state1_good.astype('int') + pat_state2_good.astype('int')
df['pat bad']  = pat_state1_bad.astype('int')  + pat_state1_bad.astype('int')
df['pat total'] = pat_state1_total.astype('int') + pat_state2_total.astype('int')

#------------------
#PAT-IMP SHOWDOWN
#------------------

imp_state1_equalp = (x > y) & (px == py)
pat_state1_equalp = (y > x) & (px == py)
imp_state2_equalp = (z > w) & (pz == pw)
pat_state2_equalp = (w > z) & (pz == pw)
state1_equalp = (px == py)
state2_equalp = (pz == pw)

df['imp equalp'] = imp_state1_equalp.astype('int') + imp_state2_equalp.astype('int')
df['pat equalp'] = pat_state1_equalp.astype('int') + pat_state2_equalp.astype('int')
df['equalp'] = state1_equalp.astype('int') + state2_equalp.astype('int')

#------------------
#CORRELATION NEUTRALITY
#------------------

cn_state1_total = ( px < pz ) & ( py < pw )
cn_state2_total = ( pz < px ) & ( pw < py )
cn_state1_good  = ( x >= z ) & ( y >= w ) & cn_state1_total
cn_state2_good  = ( z >= x ) & ( w >= y ) & cn_state2_total

df['cn good'] = cn_state1_good.astype('int') + cn_state2_good.astype('int')
df['cn total'] = cn_state1_total.astype('int') + cn_state2_total.astype('int')

results_df[['fosd good', 'fosd bad', 'imp good', 'imp bad', 'imp total', 'pat good', 'pat bad', 'pat total', 'imp equalp', 'pat equalp', 'equalp', 'cn good', 'cn total']] \
    = df[['participantid', 'fosd good', 'fosd bad', 'imp good', 'imp bad', 'imp total', 'pat good', 'pat bad', 'pat total', 'imp equalp', 'pat equalp', 'equalp', 'cn good', 'cn total']].groupby('participantid').sum()

if 'ordinary' not in results_df.columns:
    #------------------
    #GIFFEN
    #------------------
    
    results_df['ordinary'] = np.nan
    results_df['giffen'] = np.nan
    
    price_dict = { 'x': 'px', 'y': 'py', 'z': 'pz', 'w': 'pw' }
    
    for pid in ids:
        giffen_count = 0; ordinary_count = 0;
        part_df = df[df['participantid'] == pid][['x','y','z','w','px','py','pz','pw']].copy()
        part_df.index = range(41)
        
        #Make a dataframe with states flipped.
        flip_df = part_df.copy()
        flip_df[['x','y','z','w','px','py','pz','pw']] = part_df[['z','w','x','y','pz','pw','px','py']]
    
        for lowi in range(41):
            for highi in range(lowi+1,41):
                #States are normal
                #sameps counts the number of good which have the same price between obs lowi and highi
                sameps = 0
                for goodname in price_dict:
                    p_low = part_df[price_dict[goodname]][lowi]; p_high = part_df[price_dict[goodname]][highi]
                    sameps = sameps + (p_low == p_high)
                if sameps == 3:
                    #Find the mismatched price
                    for goodname in price_dict:
                        pricename = price_dict[goodname]
                        g_low = part_df[goodname][lowi]; g_high = part_df[goodname][highi]
                        p_low = part_df[pricename][lowi]; p_high = part_df[pricename][highi]
                        
                        if p_low != p_high:
                            indicator = (g_high - g_low) * (p_high - p_low )
                            if indicator < 0.:
                                ordinary_count = ordinary_count + 1
                            elif indicator > 0.:
                                giffen_count = giffen_count + 1
                    
                #States are flipped
                sameps = 0
                for goodname in price_dict:
                    p_low = part_df[price_dict[goodname]][lowi]; p_high = flip_df[price_dict[goodname]][highi]
                    sameps = sameps + (p_low == p_high)
                if sameps == 3:
                    #Find the mismatched price
                    for goodname in price_dict:
                        pricename = price_dict[goodname]
                        g_low = part_df[goodname][lowi]; g_high = flip_df[goodname][highi]
                        p_low = part_df[pricename][lowi]; p_high = flip_df[pricename][highi]
                        
                        if p_low != p_high:
                            indicator = (g_high - g_low) * (p_high - p_low )
                            if indicator < 0.:
                                ordinary_count = ordinary_count + 1
                            elif indicator > 0.:
                                giffen_count = giffen_count + 1
        results_df.loc[ pid, 'ordinary' ] = ordinary_count
        results_df.loc[ pid, 'giffen' ] = giffen_count
    results_df.to_excel(results_file)
    
#If results_df has corners or edges vars then drop them.

if 'corners' in results_df.columns:
    results_df.drop('corners',axis=1, inplace=True)
if 'edges' in results_df.columns:
    results_df.drop('edges',axis=1, inplace=True)
#-------------------------
#Add a corner flag. Equals 1 if all rows have a 0. Otherwise returns 1.
#-------------------------

df['edges'] = 0
df['corners'] = 0
zeros_count = (df[['x','y','z','w']] == 0.).sum(1)
df.loc[ zeros_count >= 1, 'edges' ] = 1
df.loc[ zeros_count >= 3, 'corners' ] = 1

char_df = df[['participantid','edges','corners']].groupby('participantid').sum()
results_df = results_df.join(char_df)
results_df.to_excel(results_file)

#=============================================================================
#MAKE SUMMARY TABLE
#=============================================================================
 
summary_file = 'data_validation_summary.xlsx'
   
#columns and row names
cats = ['all', 'edge41', 'edge0']

var_names = [ 'N', 'fosd good', 'fosd bad', 'imp good', 'imp bad', 'imp total', \
             'pat good', 'pat bad', 'pat total', 'imp equalp', 'pat equalp', 'equalp', 'cn good', 'cn total', 'ordinary', 'giffen', ]

results_df['cat'] = 'edge41'
results_df.loc[ results_df['edges'].between(0,40), 'cat' ] = 'edge0'
results_df['N'] = 1

summary_all_df = results_df[var_names].sum(0)

summary_cat_df = results_df[ var_names + ['cat'] ].groupby('cat').sum()
summary_df = pd.concat( [summary_all_df.to_frame('all').transpose(), summary_cat_df],  )

summary_df.to_excel(summary_file)



